Take Home Exercise 1

Published

November 30, 2023

Modified

December 3, 2023

Getting Started

pacman::p_load(sf, tmap, sfdep, tidyverse, mapview)

Geospatial Data

busStops <- st_read(dsn = "data/geospatial",
                 layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex1/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21

Plotting Bus Stop points

tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(busStops) +
  tm_dots()

Hexagonal Grid Creation

grid = st_make_grid(busStops, c(500), what = "polygons", square = FALSE)
# To sf and add grid ID
grid_sf = st_sf(grid) %>%
  # add grid ID
  mutate(grid_id = 1:length(lengths(grid)))
grid_sf$n_colli = lengths(st_intersects(grid_sf, busStops))

# remove grid without value of 0 (i.e. no points in side that grid)
grid_count = filter(grid_sf,n_colli > 0 )
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(grid_count) +
  tm_fill(
    col = "n_colli",
    palette = "Blues",
    style = "cont",
    title = "Number of collisions",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of collisions: " = "n_colli"
    ),
    popup.format = list(
      n_colli = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.7)

removing bus stop grids outside of singapore

grid_count_rm <- grid_count %>%
  filter(!grid_id == 1767,
         !grid_id == 2073,
         !grid_id == 2135,
         !grid_id == 2104)

Aspatial Data

busTrips <- read_csv("data/aspatial/origin_destination_bus_202308.csv")
Rows: 5709512 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202309.csv")
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202310.csv")

busTrips$ORIGIN_PT_CODE <- as.factor(busTrips$ORIGIN_PT_CODE)
busTrips$DESTINATION_PT_CODE <- as.factor(busTrips$DESTINATION_PT_CODE)

Data wrangling

Aspatial Data Wrangling

calculating bus trip in weekday and morning peak

busTripsDayMorning <- busTrips %>%
  filter(DAY_TYPE == "WEEKDAY", 
         TIME_PER_HOUR >= 6, 
         TIME_PER_HOUR <= 9) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekdayMorningTrips = sum(TOTAL_TRIPS))

calculating bus trip in weekday and afternoon peak

busTripsDayAfternoon <- busTrips %>%
  filter(DAY_TYPE == "WEEKDAY", 
         TIME_PER_HOUR >= 17, 
         TIME_PER_HOUR <= 20) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekdayAfternoonTrips = sum(TOTAL_TRIPS))

calculating bus trip in weekend and morning peak

busTripsEndMorning <- busTrips %>%
  filter(DAY_TYPE == "WEEKENDS/HOLIDAY", 
         TIME_PER_HOUR >= 11, 
         TIME_PER_HOUR <= 14) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekendMorningTrips = sum(TOTAL_TRIPS))

calculating bus trip in weekend and evening peak

busTripsEndEvening <- busTrips %>%
  filter(DAY_TYPE == "WEEKENDS/HOLIDAY", 
         TIME_PER_HOUR >= 16, 
         TIME_PER_HOUR <= 19) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(WeekendEveningTrips = sum(TOTAL_TRIPS))

Combining the Peak Trips

BusTrips_comb <- busTripsDayMorning %>%
  left_join(busTripsDayAfternoon) %>%
  left_join(busTripsEndMorning) %>%
  left_join(busTripsEndEvening)
Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`

Geospatial Data Wrangling

grid_bus <- st_join(grid_count_rm,busStops,join = st_contains) %>%
  unnest() %>%
  select(grid_id,BUS_STOP_N)
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(grid)`.
grid_bus$BUS_STOP_N <- as.factor(grid_bus$BUS_STOP_N)

Joining aspatial and geospatial

Trips <- left_join(BusTrips_comb,grid_bus,
                   by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  group_by(grid_id)%>%
  summarise(WeekdayMorningTrips = sum(WeekdayMorningTrips),
            WeekdayAfternoonTrips = sum(WeekdayAfternoonTrips),
            WeekendMorningTrips = sum(WeekendMorningTrips),
            WeekendEveningTrips = sum(WeekendEveningTrips))
Trips <- left_join(grid_count_rm,Trips) %>%
  mutate (Total_Trips = WeekdayMorningTrips+WeekdayAfternoonTrips+WeekendMorningTrips  +WeekendEveningTrips) %>% 
  rename (n_bus = n_colli)
Joining with `by = join_by(grid_id)`

Trip per bus stop, to see if the trip amount is caused by number of bus station or is it really packed

TripsPerBusStop <- Trips %>%
  mutate (WeekdayMorningTrips = WeekdayMorningTrips/n_bus,
          WeekdayAfternoonTrips = WeekdayAfternoonTrips/n_bus,
          WeekendMorningTrips = WeekendMorningTrips/n_bus,
          WeekendEveningTrips = WeekendEveningTrips/n_bus)
TripsLog <- Trips %>%
  mutate (WeekdayMorningTrips = log(WeekdayMorningTrips),
          WeekdayAfternoonTrips = log(WeekdayAfternoonTrips),
          WeekendMorningTrips = log(WeekendMorningTrips),
          WeekendEveningTrips = log(WeekendEveningTrips))

Visualising

tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(Trips) +
  tm_fill(
    col = "Total_Trips",
    palette = "Blues",
    style = "quantile",
    title = "Total Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.format = list(
      Total_Trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.7)
Warning: popup.vars not specified whereas popup.format is a list

The plot above are from total bus trip per origin bus station.

From the plot of total trips above, can be seen that bus trips are spreaded around across the country. However, there are several clusters can be seen.

Total Trips

weekday_morning <- tm_shape(Trips) +
  tm_fill(
    col = "WeekdayMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekday_afternoon <- tm_shape(Trips) +
  tm_fill(
    col = "WeekdayAfternoonTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Afternoon Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_morning <- tm_shape(Trips) +
  tm_fill(
    col = "WeekendMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_evening <- tm_shape(Trips) +
  tm_fill(
    col = "WeekendEveningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Evening Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
tmap_mode("plot")
tmap mode set to plotting
tmap_arrange(weekday_morning, weekday_afternoon, weekend_morning, weekend_evening,
            ncol=2, nrow=2)

The plot above are segragated trips divided into 4 categories: Weekday morning, Weekday afternoon, Weekend morning, and weekend evening. There are only minimal differences between them. However a noticable different is on the east side. The east side quantile is lower on the evening or afternoon than in the morning both on weekend and weekday. Also, in the central can be seen that they have a higher quantile during the afternoon and evening.

Total Trips per Bus Stop

weekday_morning_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekdayMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekday_afternoon_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekdayAfternoonTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Afternoon Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_morning_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekendMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_evening_per_stop <- tm_shape(TripsPerBusStop) +
  tm_fill(
    col = "WeekendEveningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Evening Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
tmap_mode("plot")
tmap mode set to plotting
tmap_arrange(weekday_morning_per_stop, weekday_afternoon_per_stop,
             weekend_morning_per_stop, weekend_evening_per_stop,
            ncol=2, nrow=2) 

the plot above shows the differences of total number of trips based of the time of day and the day. The number of total trip in a hexagon are divided per each number of bus station in the hexagon. However, the result are not change by a lot as well.

Log value of trips

weekday_morning_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekdayMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Morning Trips per Bus Stop",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekday_afternoon_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekdayAfternoonTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekday Afternoon Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_morning_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekendMorningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Morning Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
weekend_evening_log <- tm_shape(TripsLog) +
  tm_fill(
    col = "WeekendEveningTrips",
    palette = "Blues",
    style = "quantile",
    title = "Weekend Evening Trips",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6
  ) +
  tm_borders(col = "grey40", lwd = 0.7) +
  tm_legend(scale = 0.5)
tmap_mode("plot")
tmap mode set to plotting
tmap_arrange(weekday_morning_log, weekday_afternoon_log,
             weekend_morning_log, weekend_evening_log,
            ncol=2, nrow=2)

The above plot shows the value of total number of trip per hexagon after applying log to the number. There are also minimal differences between them.

Local Indicators of Spatial Association (LISA) Analysis

Spatial Weight

wm_idw <- Trips %>%
  mutate(nb = st_contiguity(grid),
         wts = st_inverse_distance(nb, grid,
                                   scale = 1,
                                   alpha = 1),
         .before = 1) %>%
  mutate(grid_id = 1:length(Trips$grid_id))
! Polygon provided. Using point on surface.
# wm_idw$WeekdayMorningTrips <- ifelse(is.na(wm_idw$WeekdayMorningTrips),
#                                      0.99,wm_idw$WeekdayMorningTrips)

Local Moran

Weekday Morning

lisa <- wm_idw %>% 
  mutate(local_moran = local_moran(
    WeekdayMorningTrips, nb, wts, nsim = 99, na.action=na.pass),
         .before = 1) %>%
  unnest(local_moran)
Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekdayMorningTrips, nb, wts, nsim =
  99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran <- tm_shape(lisa) +
  tm_fill("ii") + 
  tm_borders(alpha = 0.5) +
  tm_view(set.zoom.limits = c(6,8)) +
  tm_layout(main.title = "local Moran's I of Trip",
            main.title.size = 0.8)
p_val_moran <- tm_shape(lisa) +
  tm_fill("p_ii_sim") + 
  tm_borders(alpha = 0.5) +
   tm_layout(main.title = "p-value of local Moran's I",
            main.title.size = 0.8)
tmap_arrange(ii_val_moran, p_val_moran, ncol = 2)
Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig <- lisa  %>%
  filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)

tm_shape(lisa) +
  tm_polygons() +
  tm_borders(alpha = 0.5) +
tm_shape(lisa_sig) +
  tm_fill("mean", palette = pal ) + 
  tm_borders(alpha = 0.4)
Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).